{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Tuning Gradient Boosting Trees"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Load this line within an IPython notebook to visualize within the notebook\n",
    "%matplotlib inline \n",
    "\n",
    "from __future__ import division #Load within Python 2.7 for regular division\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "\n",
    "cali_housing = fetch_california_housing()\n",
    "\n",
    "X = cali_housing.data\n",
    "y = cali_housing.target\n",
    "\n",
    "#bin output variable to split training and testing sets into two similar sets\n",
    "bins = np.arange(6)\n",
    "binned_y = np.digitize(y, bins)\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=binned_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomizedSearchCV(cv=3, error_score='raise',\n",
       "          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n",
       "             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,\n",
       "             max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
       "             min_impurity_split=None, min_samples_leaf=1,\n",
       "             min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "             n_estimators=100, presort='auto', random_state=None,\n",
       "             subsample=1.0, verbose=0, warm_start=True),\n",
       "          fit_params=None, iid=True, n_iter=30, n_jobs=-1,\n",
       "          param_distributions={'loss': ['ls', 'huber'], 'learning_rate': [0.0001, 0.001, 0.01, 0.05, 0.1, 0.3], 'min_samples_leaf': [2, 3, 5, 10], 'n_estimators': [50, 100], 'max_features': ['log2', 1.0], 'max_depth': [3, 5, 7, 10]},\n",
       "          pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
       "          return_train_score=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "\n",
    "param_dist = {'max_features' : ['log2',1.0],\n",
    " 'max_depth' : [3, 5, 7, 10],\n",
    " 'min_samples_leaf' : [2, 3, 5, 10],\n",
    " 'n_estimators': [50, 100],\n",
    " 'learning_rate' : [0.0001,0.001,0.01,0.05,0.1,0.3],\n",
    " 'loss' : ['ls','huber']\n",
    " }\n",
    "\n",
    "pre_gs_inst = RandomizedSearchCV(GradientBoostingRegressor(warm_start=True),\n",
    " param_distributions = param_dist,\n",
    " cv=3,\n",
    " n_iter = 30, n_jobs=-1)\n",
    "pre_gs_inst.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Grid CV Report \n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>param_type</th>\n",
       "      <th>param_set</th>\n",
       "      <th>mean_score</th>\n",
       "      <th>mean_std</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>param_loss</td>\n",
       "      <td>huber</td>\n",
       "      <td>0.409401</td>\n",
       "      <td>0.346610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>param_loss</td>\n",
       "      <td>ls</td>\n",
       "      <td>0.384585</td>\n",
       "      <td>0.340986</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>param_min_samples_leaf</td>\n",
       "      <td>2</td>\n",
       "      <td>0.464249</td>\n",
       "      <td>0.357226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>param_min_samples_leaf</td>\n",
       "      <td>3</td>\n",
       "      <td>0.504020</td>\n",
       "      <td>0.267353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>param_min_samples_leaf</td>\n",
       "      <td>5</td>\n",
       "      <td>0.363231</td>\n",
       "      <td>0.388734</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>param_min_samples_leaf</td>\n",
       "      <td>10</td>\n",
       "      <td>-0.004219</td>\n",
       "      <td>0.024699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>param_max_depth</td>\n",
       "      <td>3</td>\n",
       "      <td>0.407668</td>\n",
       "      <td>0.299136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>param_max_depth</td>\n",
       "      <td>5</td>\n",
       "      <td>0.365563</td>\n",
       "      <td>0.384754</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>param_max_depth</td>\n",
       "      <td>7</td>\n",
       "      <td>0.396002</td>\n",
       "      <td>0.370248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>param_max_depth</td>\n",
       "      <td>10</td>\n",
       "      <td>0.422357</td>\n",
       "      <td>0.398164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.0001</td>\n",
       "      <td>-0.011166</td>\n",
       "      <td>0.026935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.001</td>\n",
       "      <td>0.057083</td>\n",
       "      <td>0.038219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.469808</td>\n",
       "      <td>0.072167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.05</td>\n",
       "      <td>0.764856</td>\n",
       "      <td>0.060034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.787213</td>\n",
       "      <td>0.043201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.817437</td>\n",
       "      <td>0.008105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>param_max_features</td>\n",
       "      <td>1</td>\n",
       "      <td>0.409213</td>\n",
       "      <td>0.365447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>param_max_features</td>\n",
       "      <td>log2</td>\n",
       "      <td>0.381256</td>\n",
       "      <td>0.316394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>param_n_estimators</td>\n",
       "      <td>50</td>\n",
       "      <td>0.353859</td>\n",
       "      <td>0.361134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>param_n_estimators</td>\n",
       "      <td>100</td>\n",
       "      <td>0.451491</td>\n",
       "      <td>0.310050</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                param_type param_set  mean_score  mean_std\n",
       "0               param_loss     huber    0.409401  0.346610\n",
       "1               param_loss        ls    0.384585  0.340986\n",
       "2   param_min_samples_leaf         2    0.464249  0.357226\n",
       "3   param_min_samples_leaf         3    0.504020  0.267353\n",
       "4   param_min_samples_leaf         5    0.363231  0.388734\n",
       "5   param_min_samples_leaf        10   -0.004219  0.024699\n",
       "6          param_max_depth         3    0.407668  0.299136\n",
       "7          param_max_depth         5    0.365563  0.384754\n",
       "8          param_max_depth         7    0.396002  0.370248\n",
       "9          param_max_depth        10    0.422357  0.398164\n",
       "10     param_learning_rate    0.0001   -0.011166  0.026935\n",
       "11     param_learning_rate     0.001    0.057083  0.038219\n",
       "12     param_learning_rate      0.01    0.469808  0.072167\n",
       "13     param_learning_rate      0.05    0.764856  0.060034\n",
       "14     param_learning_rate       0.1    0.787213  0.043201\n",
       "15     param_learning_rate       0.3    0.817437  0.008105\n",
       "16      param_max_features         1    0.409213  0.365447\n",
       "17      param_max_features      log2    0.381256  0.316394\n",
       "18      param_n_estimators        50    0.353859  0.361134\n",
       "19      param_n_estimators       100    0.451491  0.310050"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def get_grid_df(fitted_gs_estimator):\n",
    "    res_dict = fitted_gs_estimator.cv_results_\n",
    " \n",
    "    results_df = pd.DataFrame()\n",
    "    for key in res_dict.keys():\n",
    "         results_df[key] = res_dict[key]\n",
    " \n",
    "    return results_df\n",
    "\n",
    "def group_report(results_df):\n",
    "    param_cols = [x for x in results_df.columns if 'param' in x and x is not 'params']\n",
    "    focus_cols = param_cols + ['mean_test_score']\n",
    " \n",
    "    print \"Grid CV Report \\n\"\n",
    " \n",
    "    output_df = pd.DataFrame(columns = ['param_type','param_set', 'mean_score','mean_std'])\n",
    "    cc = 0\n",
    "    for param in param_cols:\n",
    "        for key,group in results_df.groupby(param):\n",
    "            output_df.loc[cc] = (param, key, group['mean_test_score'].mean(), group['mean_test_score'].std())\n",
    "            cc += 1\n",
    "    return output_df\n",
    "\n",
    "results_df = get_grid_df(pre_gs_inst)\n",
    "group_report(results_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomizedSearchCV(cv=3, error_score='raise',\n",
       "          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n",
       "             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,\n",
       "             max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
       "             min_impurity_split=None, min_samples_leaf=1,\n",
       "             min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "             n_estimators=100, presort='auto', random_state=None,\n",
       "             subsample=1.0, verbose=0, warm_start=True),\n",
       "          fit_params=None, iid=True, n_iter=30, n_jobs=-1,\n",
       "          param_distributions={'loss': ['ls', 'huber'], 'learning_rate': [0.2, 0.25, 0.3, 0.4], 'min_samples_leaf': [3, 4], 'n_estimators': [50, 100], 'max_features': ['sqrt', 0.5, 1.0], 'max_depth': [2, 3, 4]},\n",
       "          pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
       "          return_train_score=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_dist = {'max_features' : ['sqrt',0.5,1.0],\n",
    " 'max_depth' : [2,3,4],\n",
    " 'min_samples_leaf' : [3, 4],\n",
    " 'n_estimators': [50, 100],\n",
    " 'learning_rate' : [0.2,0.25, 0.3, 0.4],\n",
    " 'loss' : ['ls','huber']\n",
    " }\n",
    "pre_gs_inst = RandomizedSearchCV(GradientBoostingRegressor(warm_start=True),\n",
    "param_distributions = param_dist,cv=3,n_iter = 30, n_jobs=-1)\n",
    "pre_gs_inst.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Grid CV Report \n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>param_type</th>\n",
       "      <th>param_set</th>\n",
       "      <th>mean_score</th>\n",
       "      <th>mean_std</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>param_loss</td>\n",
       "      <td>huber</td>\n",
       "      <td>0.779943</td>\n",
       "      <td>0.022787</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>param_loss</td>\n",
       "      <td>ls</td>\n",
       "      <td>0.783039</td>\n",
       "      <td>0.031233</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>param_min_samples_leaf</td>\n",
       "      <td>3</td>\n",
       "      <td>0.776799</td>\n",
       "      <td>0.025490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>param_min_samples_leaf</td>\n",
       "      <td>4</td>\n",
       "      <td>0.791408</td>\n",
       "      <td>0.025709</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>param_max_depth</td>\n",
       "      <td>2</td>\n",
       "      <td>0.760400</td>\n",
       "      <td>0.019848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>param_max_depth</td>\n",
       "      <td>3</td>\n",
       "      <td>0.786706</td>\n",
       "      <td>0.013079</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>param_max_depth</td>\n",
       "      <td>4</td>\n",
       "      <td>0.808736</td>\n",
       "      <td>0.014967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.773884</td>\n",
       "      <td>0.024891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.782548</td>\n",
       "      <td>0.032352</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.788241</td>\n",
       "      <td>0.026682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>param_learning_rate</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0.794763</td>\n",
       "      <td>0.003527</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>param_max_features</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.790798</td>\n",
       "      <td>0.022962</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>param_max_features</td>\n",
       "      <td>1</td>\n",
       "      <td>0.787266</td>\n",
       "      <td>0.025484</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>param_max_features</td>\n",
       "      <td>sqrt</td>\n",
       "      <td>0.765834</td>\n",
       "      <td>0.024313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>param_n_estimators</td>\n",
       "      <td>50</td>\n",
       "      <td>0.774525</td>\n",
       "      <td>0.022800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>param_n_estimators</td>\n",
       "      <td>100</td>\n",
       "      <td>0.789887</td>\n",
       "      <td>0.028242</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                param_type param_set  mean_score  mean_std\n",
       "0               param_loss     huber    0.779943  0.022787\n",
       "1               param_loss        ls    0.783039  0.031233\n",
       "2   param_min_samples_leaf         3    0.776799  0.025490\n",
       "3   param_min_samples_leaf         4    0.791408  0.025709\n",
       "4          param_max_depth         2    0.760400  0.019848\n",
       "5          param_max_depth         3    0.786706  0.013079\n",
       "6          param_max_depth         4    0.808736  0.014967\n",
       "7      param_learning_rate       0.2    0.773884  0.024891\n",
       "8      param_learning_rate      0.25    0.782548  0.032352\n",
       "9      param_learning_rate       0.3    0.788241  0.026682\n",
       "10     param_learning_rate       0.4    0.794763  0.003527\n",
       "11      param_max_features       0.5    0.790798  0.022962\n",
       "12      param_max_features         1    0.787266  0.025484\n",
       "13      param_max_features      sqrt    0.765834  0.024313\n",
       "14      param_n_estimators        50    0.774525  0.022800\n",
       "15      param_n_estimators       100    0.789887  0.028242"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results_df = get_grid_df(pre_gs_inst)\n",
    "group_report(results_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n",
       "             learning_rate=0.3, loss='huber', max_depth=6,\n",
       "             max_features=0.5, max_leaf_nodes=None,\n",
       "             min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "             min_samples_leaf=4, min_samples_split=2,\n",
       "             min_weight_fraction_leaf=0.0, n_estimators=4000,\n",
       "             presort='auto', random_state=None, subsample=1.0, verbose=0,\n",
       "             warm_start=True)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_dist = {'max_features' : [0.4, 0.5, 0.6],\n",
    " 'max_depth' : [5,6],\n",
    " 'min_samples_leaf' : [4,5],\n",
    " 'n_estimators': [300],\n",
    " 'learning_rate' : [0.3],\n",
    " 'loss' : ['ls','huber']\n",
    " }\n",
    "rs_gbt = GradientBoostingRegressor(warm_start=True,\n",
    " max_features = 0.5,\n",
    " min_samples_leaf = 4,\n",
    " learning_rate=0.3,\n",
    " max_depth = 6,\n",
    " n_estimators = 4000,loss = 'huber')\n",
    "\n",
    "rs_gbt.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "R-squared 0.828251447771\n",
      "MAE :  0.309495256671\n",
      "MAPE :  0.169353509078\n"
     ]
    }
   ],
   "source": [
    "y_pred = rs_gbt.predict(X_test)\n",
    "\n",
    "from sklearn.metrics import r2_score, mean_absolute_error\n",
    "\n",
    "print \"R-squared\",r2_score(y_test, y_pred)\n",
    "print \"MAE : \",mean_absolute_error(y_test, y_pred)\n",
    "print \"MAPE : \",(np.abs(y_test - y_pred)/y_test).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xd11a4e0>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEACAYAAABfxaZOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGYtJREFUeJzt3W+MZNV95vHvA7OAndjMyBHTmxmbxovBgxWnlyTDrpwo\nPbGDjRMNfhEhnH/uYOUN7MLGisUM+2J23wQTaWVvlBDJCpkeIhyM2VgQieABMTeRoxhw4omJh+CJ\nosbDxNNZhz+7xDJi1r99Ubfpqq6609W3/pxb5z4faTR9T53b53equk/dfurWLUUEZmbWDuelLsDM\nzKbHi76ZWYt40TczaxEv+mZmLeJF38ysRbzom5m1yKaLvqR7JK1K+npX229LelbScUn/S9Jbu247\nKOlkefu1Xe1XS/q6pG9K+sz4p2JmZpsZ5kj/MPDBDW1HgfdExAJwEjgIIOkq4AZgD3AdcLcklfv8\nPvDxiLgCuELSxu9pZmYTtumiHxFfBl7a0PZ4RHy/3PwKsLv8ej9wf0ScjYgVOk8IeyXNAW+JiKfL\nfvcCHxlD/WZmtgXjyPRvAh4pv94FnOq67XTZtgt4oav9hbLNzMymaKRFX9J/BV6PiD8eUz1mZjZB\n2+ruKGkJ+DDwM13Np4G3d23vLtuq2qu+ty8IZGZWQ0ToXLcPe6Sv8l9nQ/oQ8Elgf0S81tXvYeBG\nSRdIugy4HHgqIs4Ar0jaW76w+6vAQ5sUnu2/Q4cOJa/Bc/P8PL+8/h06dGioxXzTI31JnwMWgbdJ\n+hZwCLgDuAB4rDw55ysRcXNEnJD0AHACeB24OSLWjtpvAZaBi4BHIuLRoSrM0MrKSuoSJibnuYHn\nN+tynt+wc9t00Y+IXxzQfPgc/e8E7hzQ/tfAjwxVlZmZTYTfkZvA0tJS6hImJue5gec363Ke37Bz\n03r60hySool1mZk1mSRiTC/k2hgVRZG6hInJeW7g+c26nOc37Ny86JuZtYjjHTOzTDjeMTOzHl70\nE3CuOLs8v9mW8/yc6ZuZWR9n+mZmmXCmb2ZmPbzoJ+BccXZ5frMt5/k50zczsz7O9M3MMuFM38zM\nenjRT8C54uzy/GZbzvNzpm9mZn2c6ZuZZcKZvpmZ9fCin4Bzxdnl+c22nOfnTN/MzPo40zczy4Qz\nfTMz6+FFPwHnirPL85ttOc/Pmb6ZmfVxpm9mlgln+mZm1sOLfgLOFWeX5zfbcp7fsHPbNtkyzMym\nb25untXV5/vad+zYyYsvnklQUXNsmulLugf4eWA1It5btu0APg9cCqwAN0TEK+VtB4GbgLPAbRFx\ntGy/GlgGLgIeiYj/co4xnembWW2SgEFriMh5bRlXpn8Y+OCGtgPA4xFxJfAEcLAc8CrgBmAPcB1w\ntzr3PsDvAx+PiCuAKyRt/J5mZjZhmy76EfFl4KUNzdcDR8qvjwAfKb/eD9wfEWcjYgU4CeyVNAe8\nJSKeLvvd27VP6zhXnF2enzXVpM/TvyQiVgEi4gxwSdm+CzjV1e902bYLeKGr/YWyzczMpmhcZ+/k\nG5JNwOLiYuoSJibnuYHnZ8017GNX9+ydVUk7I2K1jG7+uWw/Dby9q9/usq2qvdLS0hLz8/MAbN++\nnYWFhTcmtfZnjLe97W1vV22vW9tuVn3j2C6KguXlZYA31stNRcSm/4B54Jmu7buA28uvbwc+VX59\nFfA14ALgMuAfWD9D6CvAXkDAI8CHzjFe5OzYsWOpS5iYnOcW4fnNCiAgBvzLd205duzY2vzOuZ5v\neqQv6XN0niLfJulbwCHgU8AXJN0EPE/njB0i4oSkB4ATwOvAzRFvnB91C72nbD463NOSmZmNi6+9\nY2bZ8Xn61XwZBjOzFvGin0D/C035yHlu4PlZcw372HnRNzNrEWf6LVV1QaqdOy/lzJmV6RdkNkbO\n9M/Rp4l3gBf9yWvrL4W1Q1t/vv1CbkPlnJvmPDfw/Ky5nOmbmVkfxzst1dY/f60d2vrz7XjHzMx6\neNFPIOfcNOe5gednzeVM394wNzePpJ5/ZtZOzvRbYHC+2c7M09rBmX41H+mbmbWIF/0Ecs5Nc54b\neH7WXM70zcysjzP9FnCmb23jTL+aj/TNzFrEi34COeemOc8NPD9rrmEfu00/I9dG40sYm1mTONOf\nsCZki870rW2a8HuXgjN9MzPr4UU/iSJ1AROTeybs+VlT+Tx9MzPr40x/wpqQLTrTt7Zpwu9dCs70\nzcyshxf9JIrUBUxM7pmw52dN5Uw/c4OukT83N5+6LDNrOGf6EzapbLEqpx/0PZ3pW9s406/mI30z\nsxYZadGX9BuS/k7S1yXdJ+kCSTskHZX0nKQvSbq4q/9BSSclPSvp2tHLn1VF6gImJvdM2POzppp4\npi/ph4H/DFwdEe+lcx2fjwIHgMcj4krgCeBg2f8q4AZgD3AdcLf8Ya1jdmFfzj+Ou9ivH5jlo3am\nXy76fwUsAP8X+BPgd4DfBX46IlYlzQFFRLxb0gEgIuKucv8/A/5bRDw54Hu3MtPfysXZtprTj5Lp\nb+X1A7MmcKZfrfaRfkT8E/A/gG8Bp4FXIuJxYGdErJZ9zgCXlLvsAk51fYvTZVs2Bh0Rb0VnwY++\nf4OeCMzM6qh9aWVJ24HrgUuBV4AvSPol+p9eaz2tLi0tMT8/D8D27dtZWFhgcXERWM+umra9vmgX\n5SwW6RxRd28DfKZnrv1Z3Mb+nT4bx+vvX71/p21xw+2D+2+c32b7d/fvri314zGJbc9vdrbXrW03\nq75xPV7Ly8tsxSjxzi8AH4yIXy+3fwX4D8DPAItd8c6xiNgzIN55FDiUU7wzfORSAPuGjFE632PY\nyCV1vNP95JQjz282tDHeKYqCffv2bRrvjLLo7wXuAX4CeA04DDwNvAN4MSLuknQ7sCMiDpQv5N4H\nXEMn1nkMeNeg1T3/Rb/TnuOib9YEbVz0YbhMv3a8ExFPSXoQ+Brwevn/Z4G3AA9Iugl4ns4ZO0TE\nCUkPACfK/jfP5MqevQvHcsaPmTXTSOfpR8R/j4g9EfHeiPhYRLweES9GxAci4sqIuDYiXu7qf2dE\nXF7uc3T08mdVkbqAc3iN/heTh9efp+bF87OmGvax8ztyzcxaxNfeGaNcMv3h+uadjdpsc6ZfzUf6\nZmYt4kU/iSJ1AROTeybs+VlTOdM3M7M+zvTHyJm+WTM406/mI30zsxbxop9EkbqAick9E/b8rKmc\n6ZuZWR9n+mPkTN+sGZzpV/ORvplZi3jRT6LYYv/+j0FsqtwzYc/PmmrYx672VTZtVFu5muXaRdC6\nNXfhN7PmcqY/Rk3I2ac31kV0nox6Dfo8X7Npc6Z/jj5NvAO86Nft24yxZvGxs7x40a/mTD+JInUB\nVlPumXfu88uZz9M3M7M+jnfGyPFOp30WHzvLi+Odaj7S38Tc3Hzf6ZKSmJubT12amdmWedHfxOrq\n8/R/ZmyU7XUVY6jMUsg98859fjlzpm9mZn2c6W9i1q6H04Sxhn3s5ubm+/5i8nn+Ng7O9M/Rp4l3\ngBf9un2bMdawj13V/dWUx95mlxf9ao53kihSF2A15Z555z6/nDnTNzOzPo53NuF4Z+v7O96x1Bzv\nVPORvplZi3jRT6JIXYDVlHvmnfv8cjaVTF/SxZK+IOlZSd+QdI2kHZKOSnpO0pckXdzV/6Ckk2X/\na0cZ22ZH1buazWz6Rsr0JS0Dfx4RhyVtA34AuAP4l4j4bUm3Azsi4oCkq4D7gJ8AdgOPA+8aFN47\n06/btxljbeWzf53p2yQ4069W+0hf0luBn4qIwwARcTYiXgGuB46U3Y4AHym/3g/cX/ZbAU4Ce+uO\nb2ZmWzdKvHMZ8B1JhyX9jaTPSnozsDMiVgEi4gxwSdl/F3Cqa//TZVsLFakLsJpyz7xzn1/OpvEZ\nuduAq4FbIuKrkj4NHKD/b6paf0stLS0xPz8PwPbt21lYWGBxcRFYn9y0ttcX6Y3b9PTvaqnov7Z9\nvKJ/1f5rbYPHH9/+G2+vt3///Tfa/t72dp3tdWvbzapvHNtFUbC8vMxW1M70Je0E/ioi3llu/ySd\nRf/fAYsRsSppDjgWEXskHQAiIu4q+z8KHIqIJwd8b2f6tfo2Yyxn+paaM/1qteOdMsI5JemKsun9\nwDeAh4Glsu1jwEPl1w8DN0q6QNJlwOXAU3XHNzOzrRv1PP1bgfskHQd+FPgt4C7gZyU9R+eJ4FMA\nEXECeAA4ATwC3NyYw/mpK1IXYDXlnnnnPr+cTSPTJyL+ls4pmBt9oKL/ncCdo4xpubtw4Dn8vuSy\n2Xj42jubcKa/9f1HzfTbmMXaeDnTr+bLMJiZtYgX/SSK1AVYTbln3rnPL2fDPnZe9M3MWsSZ/iac\n6W99f2f6lpoz/Wojnb3TboPPMjEzazLHO7W9RudIovvfsIpJFGRTkHvmnfv8cuZMv4ZB1303M8uJ\nM/3ecck1Z3emb23iTL+aj/TNzFrEi34SReoCrKbcM+/c55czZ/pmZtbHmX7vuOSas89+pn8RnTOm\n1vkibFbFmX41n6dvYzap9y+snSK7bnXVZ1eZbZXjnSSK1AVM0CjvX2i+3DPv3OeXM2f6ZmbWx5l+\n77jkmrPP9ljVfZv482vpOdOv5iN9M7MW8aKfRJG6AKsp98w79/nlzJm+mZn1cabfOy6znX3nOlZ1\n3yb+/Fp6zvSr+UjfWmHQFVTn5uZTl2U2dV70kyhSF9A6q6vPs/H9A522rck98859fjlzpm9mZn2c\n6feOy2xn37mOVd132J+Tqse2iT//Njpn+tV8pG9ZGZTd+xPQzNZ50U+iSF1AJi7sW9wHZffjvP5P\n7pl37vPL2bCPna+yaTOs/8qbnRjIzKqMnOlLOg/4KvBCROyXtAP4PHApsALcEBGvlH0PAjcBZ4Hb\nIuJoxfd0pu+xplJXzvlumznTrzaOeOc24ETX9gHg8Yi4EngCOFgWcxVwA7AHuA64Ww5bzcymaqRF\nX9Ju4MPAH3Q1Xw8cKb8+Anyk/Ho/cH9EnI2IFeAksHeU8WdXkboAqyn3zDv3+eVsWufpfxr4JL1/\nR+2MiFWAiDgDXFK27wJOdfU7XbaZmdmU1H4hV9LPAasRcVzS4jm61grQlpaWmJ+fB2D79u0sLCyw\nuNgZZu0Zbdzb69a2FzfZ3th/2P032mz/tba64w+7/8bbm7J/Vf/x7L+Vn5fFxcWJ/fw1YTun+a1b\n225WfePYLoqC5eVlgDfWy83UfiFX0m8Bv0znRdk3AW8Bvgj8OLAYEauS5oBjEbFH0gEgIuKucv9H\ngUMR8eSA7+0Xcj3WVOrK+UW9NvMLudVqxzsRcUdEvCMi3gncCDwREb8C/CmwVHb7GPBQ+fXDwI2S\nLpB0GXA58FTd8WdbkboAqyn3zDv3+eUs5Xn6nwIekHQT8DydM3aIiBOSHqBzps/rwM1JDufNzFrM\n197pHZfUcYPHcrxjo3O8U82XYTAzaxEv+kkUqQuwmnLPvHOfX858PX0zM+vjTL93XFJnzB7Lmb6N\nzpl+NR/pm5m1iBf9JIrUBVhNuWfeuc8vZ870zcysjzP93nFJnTF7LGf6Njpn+tV8pG8t1v9xi5KY\nm5tPXZjZxHjRT6JIXYAB6x+32Puv8zm7g+Weeec+v5w50zerrf8vAB/9Wy6c6feOS+qM2WM1t64m\n/q7YYM70q/lI38ysRbzoJ1GkLsBqyj3zzn1+OXOmb2ZmfZzp947LLGXM7RmrGXU18XfFBnOmX81H\n+mZmLeJFP4kidQFW0zQy77m5+WRvGnOmP7tSfkaumY2g8+aw/ghidfWcf7WbDcWZfu+4zFLG3J6x\nmlHXtH4m25pHj1Nb70Nn+mZm1sOLfhJF6gKsptwz79znlzOfp38OVS+UmZnlrpWZ/rnyvlnKmNsz\nVjPqmsTP5NzcfMVVPduXR4+TM/1qPnvHbAqqF3cY/GRkNhmtjHfSK1IXYDXVzbzXT8Pc+K9ZnOnP\nLmf6ZmbWx5l+7y0D2pubMbdnrGbUNezP5FZz+q3U1cTf1yZypl+t9pG+pN2SnpD0DUnPSLq1bN8h\n6aik5yR9SdLFXfsclHRS0rOSrq07tlkTVJ0FNjjKMWuGUeKds8AnIuI9wH8EbpH0buAA8HhEXAk8\nARwEkHQVcAOwB7gOuFutPU+ySF2A1dSdm85KTr8VzvRn18SvvRMRZ4Az5devSnoW2A1cD/x02e0I\nnRXuALAfuD8izgIrkk4Ce4En69ZgNj0X+r0cloWxvJAraR5YAL4C7IyIVXjjieGSstsu4FTXbqfL\nthZaTF2Abdlr5HREX2VxcTF1CVbTsI/dyIu+pB8EHgRui4hX6f9tyPO3w8xsBo305ixJ2+gs+H8U\nEQ+VzauSdkbEqqQ54J/L9tPA27t23122DbS0tMT8/DwA27dvZ2Fh4Y1nsrXsqu52R8H6EXexYfS1\n7Y23b9Z/2P0/s8X919rqjj/s/htvb8r+Vf0ntf9a26D9N+671f0H9d/a/qP+/J9ruzsXnsT3n+b2\nurXtZtU3rsdreXmZrRjplE1J9wLfiYhPdLXdBbwYEXdJuh3YEREHyhdy7wOuoRPrPAa8a9C5mfmf\nslkA+6Y01qT6tu+UzU5bwfpiPO374CI6MdO6nTsv5cyZlQF96ymKIouIp42nbBZFwb59+zY9ZbP2\noi/pfcBfAM+wHnLeATwFPEDnqP554IaIeLnc5yDwceB1OnHQ0Yrvnfmi77HyqKsZ90Gui9go2rjo\nw3Dn6fvNWb23DGifpYUh17GaWlcz7oMm/g6n5kW/mi/DkESRugCrrUhdwET5PP3Z5WvvmJlZH8c7\nvbcMaJ+lCCDXsZpaVzPugyb+DqfmeKeaj/TNZtqFfdf+mZubT12UNZgX/SSK1AVYbUXqAjbof6dw\n9Ye1bM6Z/uxypm/WWv1H//4LwNY40++9ZUD7LOW+uY7V1Lpm7z5o4u/7JDjTr+YjfTOzFvGin0SR\nugCrrUhdwFhVfRCMo6DZ40zfzDbV/0Ewxxj1xWBrNmf6vbcMaJ+l3DfXsZpa1+zdBxt/r3LNvnOd\n12aGyfRHurSymc0Sf/qXOd5JpEhdgNVWpC5gBMN8+lcxzYJsjJzpm9kIxv9OX79o3AzO9HtvGdA+\nS7lvrmM1ta623QejfYjLNHN2Z/rVnOmb2ZDW4qF1q6t+jWDWON5JokhdgNVWpC5gwoqxfJdBUY5N\n1rCZvo/0zWzs1s//7+aFvwmc6ffeMqC9qflqm8Zqal2+Dwbl/Ovqv39gVM70q/lI38xG0J/zd/io\nvqmc6SdRpC7AaitSFzBhReoCrCafp1/yC0pmTedP/5qm7DP9wdlervlqrmM1tS7fB5Mca5Q1wJl+\nteyP9M0sD35H73h40U+iSF2A1VakLmDCitQFlPojn/7LQHf+ra6ecYSLz9M3s5k26KygqsV8K33N\nmf6m7c3NPNszVlPr8n0wi2M1cc0bF2f6ZmbWY+qLvqQPSfp7Sd+UdPu0x2+GInUBVluRuoAJK1IX\nYDU18jx9SecBvwt8EHgP8FFJ755mDc1wPHUBVlvuj13u88vX8ePDPXbTfiF3L3AyIp4HkHQ/cD3w\n98N+g1dffZUHH3ywr/0Tn7iDl1769rjqnLCXUxdgteX+2OU+v3y9/PJwj920F/1dwKmu7RfoPBEM\nbXl5md/8zd9j27Zrulq/z7/+67fxNUDMzM5t5k7ZPP/885G+x/nn/0tX6/eT1VPPSuoCrLaV1AVM\n2ErqAqymlZWVofpNe9E/Dbyja3t32dZnszdYfO97K4P2qug9qH2afT1WXnUdmeJYdfuOsv+RivZJ\njLXVvqOP1dY3b62Z6nn6ks4HngPeD3wbeAr4aEQ8O7UizMxabKpH+hHx/yT9J+AonTOH7vGCb2Y2\nPY18R66ZmU1Go96Rm/MbtyTdI2lV0tdT1zIJknZLekLSNyQ9I+nW1DWNk6QLJT0p6Wvl/A6lrmnc\nJJ0n6W8kPZy6lnGTtCLpb8vH76nU9YybpIslfUHSs+Xv4DWVfZtypF++ceubdPL+fwKeBm6MiKHP\n4W8yST8JvArcGxHvTV3PuEmaA+Yi4rikHwT+Grg+l8cPQNKbI+K75WtTfwncGhHZLCCSfgP4MeCt\nEbE/dT3jJOkfgR+LiJdS1zIJkpaBP4+Iw5K2AW+OiP8zqG+TjvTfeONWRLwOrL1xKwsR8WUgyx84\ngIg4ExHHy69fBZ6l876MbETEd8svL6TzelgzjpjGQNJu4MPAH6SuZUJEs9a7sZH0VuCnIuIwQESc\nrVrwoVl3wqA3bmW1aLSFpHlgAXgybSXjVcYfXwPOAI9FxNOpaxqjTwOfJKMnsg0CeEzS05J+PXUx\nY3YZ8B1Jh8t47rOS3lTVuUmLvmWgjHYeBG4rj/izERHfj4h/T+f9JddIuip1TeMg6eeA1fIvNZHn\n29jfFxFX0/lr5pYybs3FNuBq4PfKOX4XOFDVuUmL/tBv3LJmKrPEB4E/ioiHUtczKeWfzseAD6Wu\nZUzeB+wvc+8/BvZJujdxTWMVEd8u///fwBfZ4uVfGu4F4FREfLXcfpDOk8BATVr0nwYul3SppAuA\nG4HcziLI9ShqzR8CJyLif6YuZNwk/ZCki8uv3wT8LFu4UGCTRcQdEfGOiHgnnd+7JyLiV1PXNS6S\n3lz+BYqkHwCuBf4ubVXjExGrwClJV5RN7wdOVPVvzLV3cn/jlqTPAYvA2yR9Czi09sJLDiS9D/gl\n4Jky9w7gjoh4NG1lY/NvgSPlWWbnAZ+PiEcS12TD2Ql8UVLQWfPui4ijiWsat1uB+yT9G+AfgV+r\n6tiYUzbNzGzymhTvmJnZhHnRNzNrES/6ZmYt4kXfzKxFvOibmbWIF30zsxbxom9m1iJe9M3MWuT/\nA5yV7vDKKik8AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xaa1c2b0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pd.Series(y).hist(bins=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bins = np.arange(6)\n",
    "binned_y = np.digitize(y, bins)\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=binned_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_binary = np.where(y >= 5, 1,0)\n",
    "\n",
    "train_shape = X_train.shape[0]\n",
    "\n",
    "y_train_binned = y_binary[:train_shape]\n",
    "y_test_binned = y_binary[train_shape:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomizedSearchCV(cv=3, error_score='raise',\n",
       "          estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
       "              learning_rate=0.1, loss='deviance', max_depth=3,\n",
       "              max_features=None, max_leaf_nodes=None,\n",
       "              min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "              min_samples_leaf=1, min_samples_split=2,\n",
       "              min_weight_fraction_leaf=0.0, n_estimators=100,\n",
       "              presort='auto', random_state=None, subsample=1.0, verbose=0,\n",
       "              warm_start=True),\n",
       "          fit_params=None, iid=True, n_iter=10, n_jobs=-1,\n",
       "          param_distributions={'loss': ['deviance'], 'learning_rate': [0.1, 0.2, 0.3, 1], 'min_samples_leaf': [1, 2, 3, 10], 'n_estimators': [100], 'max_features': ['log2', 0.5, 1.0], 'max_depth': [2, 3, 6]},\n",
       "          pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
       "          return_train_score=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "param_dist = {'max_features' : ['log2',0.5,1.0],\n",
    "              'max_depth' : [2,3,6],\n",
    "              'min_samples_leaf' : [1,2,3,10],\n",
    "              'n_estimators': [100],\n",
    "              'learning_rate' : [0.1,0.2,0.3,1],\n",
    "              'loss' : ['deviance']\n",
    "             }\n",
    "pre_gs_inst = RandomizedSearchCV(GradientBoostingClassifier(warm_start=True),\n",
    "                                 param_distributions = param_dist,\n",
    "                                 cv=3,\n",
    "                                 n_iter = 10, n_jobs=-1)\n",
    "pre_gs_inst.fit(X_train, y_train_binned)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'learning_rate': 0.2,\n",
       " 'loss': 'deviance',\n",
       " 'max_depth': 3,\n",
       " 'max_features': 1.0,\n",
       " 'min_samples_leaf': 10,\n",
       " 'n_estimators': 100}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pre_gs_inst.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "gbc = GradientBoostingClassifier(**{'learning_rate': 0.2,\n",
    " 'loss': 'deviance',\n",
    " 'max_depth': 2,\n",
    " 'max_features': 1.0,\n",
    " 'min_samples_leaf': 2,\n",
    " 'n_estimators': 1000, 'warm_start':True}).fit(X_train, y_train_binned)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.93604651162790697"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred = gbc.predict(X_test)\n",
    "\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test_binned, y_pred)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
